CHARTS

Coffee Quality

Dot Strip Facet

Photo by Marc Babin on Unsplash

Photo by Marc Babin on Unsplash

Without my morning coffee, I’m just like a dried–up piece of goat…
— J.S. Bach


Ingest

country, score, and measures

df_arabica <- read.csv("archetypes/coffee-quality/arabica-data-cleaned.csv", header = TRUE, stringsAsFactors = FALSE)
df_arabica
df_robusta <- read.csv("archetypes/coffee-quality/robusta-data-cleaned.csv", header = TRUE, stringsAsFactors = FALSE)
df_robusta

Wrangle

clean and standardize column names

df_arabica_wrangle <- df_arabica %>% janitor::clean_names()
df_arabica_wrangle <- filter(df_arabica_wrangle, total_cup_points > 0.00 )
df_arabica_wrangle
df_robusta_wrangle <- df_robusta %>% janitor::clean_names() %>% 
  rename( acidity = salt_acid, 
          sweetness = bitter_sweet,
          aroma = fragrance_aroma, 
          body = mouthfeel, 
          uniformity = uniform_cup)

#min(df_robusta_wrangle$total_cup_points)

df_robusta_wrangle

Wrangle

merge data sets

field_match <- as.data.frame(names(df_arabica_wrangle))
field_match$robusta <- names(df_robusta_wrangle)
field_match
df_arabica_select <- df_arabica_wrangle %>% select(country_of_origin, acidity, aftertaste,aroma, balance, body, clean_cup, flavor, sweetness, uniformity)
df_robusta_select <- df_robusta_wrangle %>% select(country_of_origin, acidity, aftertaste,aroma, balance, body, clean_cup, flavor, sweetness, uniformity)

df_all <- rbind(df_arabica_select, df_robusta_select )
df_all <- df_all[complete.cases(df_all), ]
df_all <- filter(df_all, nchar(country_of_origin) > 0)
df_all

Analyze

calculate z-scores for each measure

z_scores <- df_all %>% 
  mutate(acidity_zs = (acidity - mean(acidity))/sd(acidity),
         aftertaste_zs = (aftertaste - mean(aftertaste))/sd(aftertaste),
         aroma_zs = (aroma - mean(aroma))/sd(aroma),
         balance_zs = (balance - mean(balance))/sd(balance),
         body_zs = (acidity - mean(body))/sd(body),
         clean_cup_zs = (clean_cup - mean(clean_cup))/sd(clean_cup),
         flavor_zs = (flavor - mean(flavor))/sd(flavor),
         sweetness_zs = (sweetness - mean(sweetness))/sd(sweetness),
         uniformity_zs = (uniformity - mean(uniformity))/sd(uniformity)
  ) %>% select(country_of_origin, 
               acidity_zs, 
               aftertaste_zs, 
               aroma_zs, 
               balance_zs, 
               body_zs, 
               clean_cup_zs, 
               flavor_zs, 
               sweetness_zs, 
               uniformity_zs )

z_scores

Wrangle

transform from wide to long

df_long <- melt(df_all, id.vars=c("country_of_origin"))
df_long <- filter(df_long, value > 5.0)

df_long_zs <- melt(z_scores, id.vars=c("country_of_origin"))
df_long_zs
  • acidity
  • aftertaste
  • aroma
  • balance
  • body
  • clean-cup
  • flavor
  • sweetness
  • uniformity

Plot

by overall rank and score, with conditional fill color, and a comparative marker

theme_opts <- theme(
    text = element_text(family = "inconsolata", size = 16), 
    plot.title = element_text(color = "black", size = 16, face = "bold"),
    plot.subtitle = element_text(color = "black", size = 12),
    plot.caption = element_text(color = "#555555", size = 11),
    plot.margin = margin(.25, 1, .25, .25, "in"),
    plot.background = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    #panel.grid.major.y = element_blank(),
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, face = "bold", margin = margin(t = 8.5)),
    # axis.title.y = element_blank(),
    # axis.ticks.y = element_blank(),
    axis.ticks.x = element_blank(),
    # axis.ticks.x = element_line(),
    # axis.ticks.length.x = unit(.35, "cm"),
    # axis.text.x=element_blank(),
    legend.position = "none"
)

v1 <- ggplot(df_long, aes(x = variable, y = value, alpha = value)) + 
      geom_point(size = 1, color = "#8D6E63") +
      scale_alpha_continuous(range = c(0.1, 0.5)) +
      scale_y_continuous() +
      scale_x_discrete() + 
      coord_cartesian(clip="off") +
      labs(title = "Coffee Quality", subtitle = NULL, x = NULL, y = NULL) +
      facet_wrap(~country_of_origin, ncol = 4) +
      theme_bw() +
      theme_opts

girafe(ggobj = v1, width_svg = 12, height_svg = 16,
       options = list(opts_sizing(rescale = TRUE, width = 0.75)))

References

The citations and data sources used for this case

  • Narrative and Data Source, The Economist, The glass-ceiling index, GO